### Reproduce Appendix Figure 4 and 5 (FFRISP)

### PREP
rm(list = ls())
#setwd(..)
library(foreign)
library(gam)
library(Hmisc)

data1 <- read.spss("FFRISPWave4_JAN_09.sav")
attach(data1)

########################### PREP DATA FOR  GAM MODELS #######################################

# Create smoking status variables 
  weights <- finalwgt/mean(finalwgt, na.rm=TRUE)
  
  neversmoker <- as.numeric(Q52=="No")
  currentsmoker <- as.numeric(as.numeric(Q52B)<3)
  currentsmoker[neversmoker==1] <- 0
  formersmoker <- as.numeric(as.numeric(Q52B)==3)
  formersmoker[neversmoker==1] <- 0
  
  currentvsformer <- currentsmoker
  currentvsformer[neversmoker==1] <- NA
  
  currentvsnever <- currentsmoker
  currentvsnever[formersmoker==1] <- NA
  
  smokestatus <- neversmoker+2*formersmoker+3*currentsmoker


# Calculate expected cancer rates for smokers and nonsmokers
  table(SMOKE)
  summary(Q108A) #Non-Smoker 1
  summary(Q108B) #Smoker 1
  
  nonsmokercancer <- Q108A
  nonsmokercancernofollow <- Q108A
  smokercancer <- Q108B
  smokercancernofollow <- Q108B
  
  nonsmokercancer[SMOKE=="One" & !is.na(Q110)==TRUE] <- Q110[SMOKE=="One" & !is.na(Q110)==TRUE]
  smokercancer[SMOKE=="Two" & !is.na(Q110)==TRUE] <- Q110[SMOKE=="Two" & !is.na(Q110)==TRUE]
  
  summary(Q111A) #Non-Smoker 2
  summary(Q111B) #Smoker 2
  
  nonsmokercancer[SMOKE=="Two" & !is.na(SMOKE)==TRUE] <- Q111A[SMOKE=="Two" & !is.na(SMOKE)==TRUE]
  nonsmokercancernofollow[SMOKE=="Two" & !is.na(SMOKE)==TRUE] <- Q111A[SMOKE=="Two" & !is.na(SMOKE)==TRUE]
  
  smokercancer[SMOKE=="One" & !is.na(SMOKE)==TRUE] <- Q111B[SMOKE=="One" & !is.na(SMOKE)==TRUE]
  smokercancernofollow[SMOKE=="One" & !is.na(SMOKE)==TRUE] <- Q111B[SMOKE=="One" & !is.na(SMOKE)==TRUE]
  
  nonsmokercancer[SMOKE=="Two" & !is.na(Q113)==TRUE] <- Q113[SMOKE=="Two" & !is.na(Q113)==TRUE]
  smokercancer[SMOKE=="One" & !is.na(Q113)==TRUE] <- Q113[SMOKE=="One" & !is.na(Q113)==TRUE]
  
  wtd.table(nonsmokercancernofollow, weights)$sum.of.weights/sum(wtd.table(nonsmokercancernofollow, weights)$sum.of.weights) #find % who said 500 initially
  wtd.table(smokercancernofollow, weights)$sum.of.weights/sum(wtd.table(smokercancernofollow, weights)$sum.of.weights) #find % who said 500 initially
  
  (sum(weights[SMOKE=="One" & !is.na(Q109) & Q109=="Because I don't know"])+sum(weights[SMOKE=="Two" & !is.na(Q112) & Q112=="Because I don't know"]))/sum(wtd.table(smokercancernofollow, weights)$sum.of.weights) # % who said 500 because they do not know
  (sum(weights[SMOKE=="Two" & !is.na(Q109)  & Q109=="Because I don't know"])+sum(weights[SMOKE=="One" & !is.na(Q112)  & Q112=="Because I don't know"]))/sum(wtd.table(nonsmokercancernofollow, weights)$sum.of.weights) # % who said 500 because they do not know

# Weighted t-test to check if order of questions is related to results
  order1 <- glm(smokercancer~SMOKE, weight=weights)
  order2 <- glm(nonsmokercancer~SMOKE, weight=weights)

# Create variable for attributable risk 
  smokerdif <- smokercancer-nonsmokercancer
  smokerdifnofollow <- smokercancernofollow-nonsmokercancernofollow
  smokerdifnoneg <- smokerdif
  smokerdifnoneg[smokerdif<0 & !is.na(smokerdif)] <- 0

# Create variable for relative risk  
  rateincrease <- (smokercancer+1)/(nonsmokercancer+1)
  rateincreasenofollow <- (smokercancernofollow+1)/(nonsmokercancernofollow+1)
  rateincreasenoneg <- rateincrease
  rateincreasenoneg[smokerdif<0 & !is.na(smokerdif)] <- 1
  
  rateincrease2 <- rateincrease
  rateincrease2[nonsmokercancer==0 & !is.na(nonsmokercancer)] <- smokercancer[nonsmokercancer==0 & !is.na(nonsmokercancer)]/1
  rateincrease2[smokercancer==0] <- NA
  
  sr <- log(rateincrease)
  srnf <- log(rateincreasenofollow)
  srnn <- log(rateincreasenoneg)

# Create data frame for predicted values 
  varset <- data.frame(smokerdif, rateincrease, sr, nonsmokercancer, smokercancer)

# Flag cases where incorrect questions was asked on certainty (N=3) 
  findproblems <- !is.na(Q114B) & smokerdif>=0
  findproblems2 <- !is.na(Q114A) & smokerdif!=0
  prob <- findproblems+findproblems2

# Misc recoding 
  questionorder <- as.numeric(SMOKE)

# Set weights to mean to 1 
  weights <- finalwgt/mean(finalwgt, na.rm=TRUE)

# Filter to drop cases w/ missing data or relative risks of <-450
  filter <- !is.na(sr) & smokerdif>=-450 #& smokercancer!=500 & nonsmokercancer!=500
  filter2 <- !is.na(sr) & smokercancer!=500 & nonsmokercancer!=500
  certfilter <- filter*(as.numeric(prob==0))
  
  cvffilt <- filter*!is.na(currentvsformer)
  cvnfilt <- filter*!is.na(currentvsnever)

############################# CREATE GAM MODEL TABLES#######################################

################################## Figure S4: Current vs former
  
### Relative vs. Absolute risk
  # Quadrant 1 graph - page 2 of pdf output 
  # Quadrant 4 graph - page 3 of pdf output 
  
  smoking.gam1 <- gam(currentvsformer ~ s(smokercancer) + s(sr), subset = filter==1, family=gaussian(), weights=weights)
  
  pdf("current_vs_former_absolute.pdf")
    plot(smoking.gam1, xlab="Relative Risk", se=TRUE, ylab="Probability of Being a Current Smoker", ask=F)
    plot(smoking.gam1, xlab="Absolute Risk", se=TRUE, ylab="Probability of Being a Current Smoker", ask=F)
  dev.off()

### Relative vs. Attributable risk
  # Quadrant 2 graph - page 2 of pdf output
  # Quadrant 3 graph - page 3 of pdf output 
  smoking.gam1a <- gam(currentvsformer ~ s(smokerdif) + s(sr), subset = filter==1, family=gaussian(), weights=weights)
  
  pdf("current_vs_former_attributable.pdf")
    plot(smoking.gam1a, xlab="Relative Risk", se=TRUE, ylab="Probability of Being a Current Smoker", ask=F)
    plot(smoking.gam1a, xlab="Attibutable Risk", se=TRUE, ylab="Probability of Being a Current Smoker", ask=F)
  dev.off()


################################## Figure S5: Current vs never

  ### Relative vs. Absolute risk
    # Quadrant 1 graph - page 2 of pdf output 
    # Quadrant 4 graph - page 3 of pdf output 
  
  smoking.gam3 <- gam(currentvsnever ~ s(smokercancer) + s(sr), subset = filter==1, family=gaussian(), weights=weights)
  
  pdf("current_vs_never_absolute.pdf")
    plot(smoking.gam3, xlab="Relative Risk", se=TRUE, ylab="Probability of Being a Current Smoker", ask=F)
    plot(smoking.gam3, xlab="Absolute Risk", se=TRUE, ylab="Probability of Being a Current Smoker", ask=F)
  dev.off()
  
  ### Relative vs. Attributable risk
    # Quadrant 2 graph - page 2 of pdf output
    # Quadrant 3 graph - page 3 of pdf output 

  smoking.gam3a <- gam(currentvsnever ~ s(smokerdif) + s(sr), subset = filter==1, family=gaussian(), weights=weights)
  
  pdf("current_vs_neverr_attributed.pdf")
    plot(smoking.gam3a, xlab="Relative Risk", se=TRUE, ylab="Probability of Being a Current Smoker", ask=F)
    plot(smoking.gam3a, xlab="Attibutable Risk", se=TRUE, ylab="Probability of Being a Current Smoker", ask=F)
  dev.off()


